# -*- coding: utf-8 -*-
#!/usr/bin/python
#****************************************************************#
# ScriptName: ./extract_calltrace.py
# Author: $SHTERM_REAL_USER@alibaba-inc.com
# Create Date: 2021-11-09 17:21
# Modify Author: $SHTERM_REAL_USER@alibaba-inc.com
# Modify Date: 2021-12-16 14:28
# Function: 
#***************************************************************#
import re
ignore_funcs = ["schedule","schedule_timeout","ret_from_fork","kthread",
        "do_syscall_64","entry_SYSCALL_64_after_swapgs","system_call_fastpath","fastpath",
        "entry_SYSCALL_64_after_hwframe",
        "page_fault","do_page_fault","_do_page_fault","worker_thread",
        "start_secondary","cpu_startup_entry","arch_cpu_idle","default_idle",
        "do_IRQ","common_interrupt","irq_exit","do_softirq",
        "__schedule","io_schedule_timeout","io_schedule","dump_stack",
        "exit_to_usermode_loop","stub_clone","schedule_preempt_disabled","oom_kill_process",
        "unwind_backtrace","dump_header","show_stack","dump_backtrace","panic","watchdog_timer_fn",
        "nmi_panic","watchdog_overflow_callback","__perf_event_overflow","perf_event_overflow","intel_pmu_handle_irq",
        "perf_event_nmi_handler","nmi_handle","do_nmi","end_repeat_nmi","watchdog",
        "__hrtimer_run_queues","hrtimer_interrupt","local_apic_timer_interrupt","smp_apic_timer_interrupt","apic_timer_interrupt"
]
ltime_pattern = re.compile(r'^\[\s*([0-9]+)\..*\]')
rip_pattern = re.compile(r'\[\s*\S+\] RIP: 0010:.*\[<([0-9a-f]+)>\] (.+)')
rip_pattern_1 = re.compile(r'\[\s*\S+\] RIP: 0010:(\S+)')
rip_pattern_2 = re.compile(r'\[\s*\S+\] RIP .*\[<([0-9a-f]+)>\] (.+)')
ripmod_pattern = re.compile(r'\[\s*\S+\] RIP.* \[(\S+)\]$')
bugat_pattern = re.compile(r'.+\] kernel BUG at (\S+)!')
ver_pattern = re.compile(r'Comm: (\S*).*(Tainted:|Not tainted).* (\S+) #')
unload_pattern = re.compile(r'\[last unloaded: (\S+)\]')
title_pattern = re.compile(r'\[\s*\S+\] ((BUG:|Unable to handle kernel|Kernel panic|Bad pagetable:|divide error:|kernel BUG at|general protection fault:) .+)')
vertype_pattern = re.compile(r'(\d+)\.(\d+)\.')
linux_ver_pattern = re.compile(r'\[\s*\S+\] Linux version (\S*).+')
last_strhost = ''
line_pattern = re.compile(r'.+[0-9]+\].+\[.*\][? ]* (\S+)\+0x(\S+)/0x(\S+)')
line_pattern_1 = re.compile(r'.+[0-9]+\][? ]*(\S+)\+0x(\S+)/0x(\S+)')
line_pattern_2 = re.compile(r'.*<[A-Za-z0-9]+>[? ]*(\S+)')

def get_column_value(column, line):
    match = rip_pattern.match(line)
    if match is None:
        match = rip_pattern_2.match(line)

    if match:
        column['rip']=match.group(1)
        column['func_name']=match.group(2).split('+')[0].split(".")[0]
        ripmod_match = ripmod_pattern.match(line.strip())
        if ripmod_match:
            column['ripmod']=ripmod_match.group(1)
    else:
        match = rip_pattern_1.match(line)
        if match:
            column['func_name']=match.group(1).split('+')[0].split(".")[0]
            ripmod_match = ripmod_pattern.match(line.strip())
            if ripmod_match:
                column['ripmod']=ripmod_match.group(1)

    match = bugat_pattern.match(line)
    if match:
        column['bugat']=match.group(1)
        
    idx = line.find('Comm:')
    if idx > 0:
        match = ver_pattern.match(line, idx)
        if match:
            column['comm']=match.group(1)
            column['ver']=match.group(3)

    idx = line.find('[last unloaded:')
    if idx > 0:
        match = unload_pattern.match(line, idx)
        if match:
            column['unload']=match.group(1)

    match = title_pattern.match(line)
    if match :
        column['title']=match.group(1)
        if len(column['func_name']) >= 0:
            column['tmp_func_name'] = column['func_name']
            column['tmp_rip'] = column['rip']
            column['tmp_ripmod'] = column['ripmod']
            column['func_name'] = ''
            column['rip'] = ''
            column['ripmod'] = ''

    match = linux_ver_pattern.match(line)
    if match and len(column['ver']) <= 0:
        column['ver']=match.group(1)

def extract_calltrace(column,dmesg):
    #find the nearest calltrace from title
    meettitle = 0
    lines = dmesg.split('\n')
    list1=[]
    modname = []
    tmplist = []
    workqueue = ''
    nocalltrace = True
    hung_flag = False
    column['softlockup_modcheck'] = ''
    if column['title'].find('unrecovered softlockup') >= 0:
        hung_flag = True

    invalidrip = False
    if (column['rip'] == ''and  column['func_name'] == '') or column['func_name'].startswith('0x'):
        invalidrip = True

    badrip = False
    if dmesg.find('Code:  Bad RIP value.') >= 0:
        badrip = True

    question_continue = True
    question_count = 0

    for r in lines:
        if column['title'] != "" and r.find(column['title']) >= 0:
            nocalltrace = True
            meettitle = 1
            tmplist.extend(list1)
            del list1[:]
            column['softlockup_modcheck'] = ''
            question_count = 0
            question_continue = True
            continue

        if r.find('Workqueue: events ') >= 0:
            idx = r.find('Workqueue: events ')
            workqueue = r[idx+18:]

        if r.find('EFLAGS: ') >= 0:
            idx = r.find('EFLAGS: ')
            eflags = r[idx+8:]
            try:
                eflags = int(eflags,16)
                if (eflags >> 9) % 2 == 0:
                    badrip = True
            except:
                pass
                    
        if r.find("<<EOE>>") >= 0:
            if column['func_name'] == '':
                tmpline = lines[lines.index(r)-1]
                m = line_pattern.match(tmpline)
                if m:
                    column['func_name'] = m.group(1)
                else:
                    m = line_pattern_1.match(tmpline)
                    if m:
                        column['func_name'] = m.group(1)

        if r.find('<IRQ>') >= 0:
            badrip = True

        if hung_flag and r.find('<EOI>') >= 0:
            try:
                if r.find('> ') >= 0 and r.find(' <') >= 0:
                    idx = r.find(' <')
                    idx2 = r.rfind('> ',0)
                    r = r[0:idx] + r[idx2+1:]
            except:
                import traceback
                traceback.print_exc()
            del list1[:]
            column['softlockup_modcheck'] = ''
            question_count = 0
            question_continue = True

        if r.find("Call Trace:") > 0 or r.find("<<EOE>>") > 0 or r.find("<EOE>") > 0 or r.find("<IRQ>") >= 0:
            try:
                if r.find('> ') >= 0 and r.find(' <') >= 0:
                    idx = r.find(' <')
                    idx2 = r.rfind('> ',0)
                    r = r[0:idx] + r[idx2+1:]
            except:
                import traceback
                traceback.print_exc()
            del list1[:]
            column['softlockup_modcheck'] = ''
            question_count = 0
            question_continue = True
            modname = []

        if r.find('?') >= 0:
            if workqueue != '' and r.find(workqueue) >= 0:
                list1.append(workqueue)
            if invalidrip and badrip and question_continue:
                 m2 = line_pattern.match(r)
                 if m2:
                     if m2.group(1).split('.')[0] == column['func_name']  or m2.group(1) in ignore_funcs or m2.group(1).split('.')[0] in list1:
                         continue
                     nocalltrace = False
                     if m2.group(2) != m2.group(3):
                         tmp = m2.group(1)
                         tmp = tmp.split('.')[0]
                         idx = r.find(tmp)
                         idx = r.find('[',idx)
                         if column['softlockup_modcheck'] == '' and idx >= 0 and len(list1) >0 and list1[-1].find('spin_lock') >= 0:
                             column['softlockup_modcheck'] = r[idx+1:r.find(']',idx)]
                         list1.append(tmp)
                         question_count += 1
                 else:
                     m2 = line_pattern_1.match(r)
                     if m2:
                         if m2.group(1).split('.')[0] == column['func_name'] or m2.group(1) in ignore_funcs or m2.group(1).split('.')[0] in list1:
                             continue
                         nocalltrace = False
                         if m2.group(2) != m2.group(3):
                             tmp = m2.group(1)
                             tmp = tmp.split('.')[0]
                             list1.append(tmp)
                             idx = r.find(tmp)
                             idx = r.find('[',idx)
                             if column['softlockup_modcheck'] == '' and idx >= 0 and len(list1) >0 and list1[-1].find('spin_lock') >= 0:
                                 column['softlockup_modcheck'] = r[idx+1:r.find(']',idx)]
                             question_count += 1
            continue
        if question_count > 0:
            question_continue = False

        m = line_pattern.match(r)
        if m:
            nocalltrace = False
            if m.group(1).split('.')[0] == column['func_name'] or m.group(1) in ignore_funcs or m.group(1).split('.')[0] in list1:
                continue
            if m.group(1) == 'panic':
                del list1[:]
                column['softlockup_modcheck'] = ''
                question_count = 0
                question_continue = True
                modname = []
                continue
            if len(list1) == 0 and m.group(1) in ignore_funcs:
                continue
            if len(modname) < 2:
                modname.append(r.strip())
            tmp = m.group(1)
            tmp = tmp.split('.')[0]
            idx = r.find(tmp)
            idx = r.find('[',idx)
            if column['softlockup_modcheck'] == '' and idx >= 0 and len(list1) >0 and list1[-1].find('spin_lock') >= 0:
                column['softlockup_modcheck'] = r[idx+1:r.find(']',idx)]
            list1.append(tmp)
        else:
            m = line_pattern_1.match(r)
            match_flag = 1
            if m is None:
                m = line_pattern_2.match(r)
                if m  is None:
                    match_flag = 0
            if match_flag == 1:
                nocalltrace = False
                if m.group(1).split('.')[0] == column['func_name'] or m.group(1) in ignore_funcs or m.group(1).split('.')[0] in list1:
                    continue
                if m.group(1) == 'panic':
                    del list1[:]
                    column['softlockup_modcheck'] = ''
                    question_count = 0
                    question_continue = True
                    modname = []
                    continue
                if len(list1) == 0 and m.group(1) in ignore_funcs:
                    continue
                if len(modname) < 2:
                    modname.append(r.strip())
                tmp = m.group(1)
                tmp = tmp.split('.')[0]
                idx = r.find(tmp)
                idx = r.find('[',idx)
                if column['softlockup_modcheck'] == '' and idx >= 0 and len(list1) >0 and list1[-1].find('spin_lock') >= 0:
                    column['softlockup_modcheck'] = r[idx+1:r.find(']',idx)]
                list1.append(tmp)
            else:
                if len(list1) > 2 and meettitle == 1:
                    break
    if len(list1) == 0 and nocalltrace:
        list1 = tmplist

    if column['func_name'] == '' and len(list1) > 0:
        column['func_name'] = list1[0]
        del list1[0]

    column['calltrace_list'] = []
    column['calltrace_list'].extend(list1)
    if len(column['calltrace_list']) >= 2:
        column['calltrace'] = '$'.join(column['calltrace_list'][0:2])
    else:
        column['calltrace'] = '$'.join(column['calltrace_list'][0:])


def extract_dmesg(column,dmesg):
    try:
        column['func_name'] = ''
        column['rip'] = ''
        column['title'] = ''
        column['bugat'] = ''
        column['ripmod'] = ''
        column['ver'] = ''
        column['comm'] = ''
        column['unload'] = ''
        column['softlockup_modcheck'] = ''
        for line in dmesg.splitlines():
            if line.find('Modules linked in') >= 0:
                column['modules'] = line[line.find(':')+1:]
            get_column_value(column,line)
        if 'tmp_func_name' in column and column['func_name'] == '' and column['tmp_func_name'] != '': 
            column['func_name'] = column['tmp_func_name']
            column['rip'] = column['tmp_rip']
            column['ripmod'] = column['tmp_ripmod']
        
        extract_calltrace(column,dmesg)
    except:
        import traceback
        traceback.print_exc()


def main(logfile,column):
    try:
        with open(logfile,'r') as fin:
            dmesg = fin.read()
        column['func_name'] = ''
        column['rip'] = ''
        column['title'] = ''
        column['bugat'] = ''
        column['ripmod'] = ''
        column['ver'] = ''
        column['comm'] = ''
        column['unload'] = ''
        for line in dmesg.splitlines():
            if line.find('Modules linked in') >= 0:
                column['modules'] = line[line.find(':')+1:]
            get_column_value(column,line)
        if 'tmp_func_name' in column and column['func_name'] == '' and column['tmp_func_name'] != '':
            column['func_name'] = column['tmp_func_name']
            column['rip'] = column['tmp_rip']
            column['ripmod'] = column['tmp_ripmod']

        extract_calltrace(column,dmesg)
    except:
        import traceback
        traceback.print_exc()

if __name__ == '__main__':
    logfile = './dmesg.txt'
    column = {}
    column['softlockup_modcheck'] = ''
    main(logfile,column)
    add_content = ''
    print (column)
    if column['ripmod'] != '':
        user_loaded = ' %s(OE)'%column['ripmod']
        if column['modules'].find(user_loaded) >= 0:
            add_content = '%s\n用户加载模块%s导致宕机' % (add_content,column['ripmod'])
    #test()
